In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale
from sklearn.preprocessing import StandardScaler
from sklearn import model_selection
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn import neighbors
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,classification_report
from sklearn import metrics

In [2]:
from warnings import filterwarnings
filterwarnings('ignore')

In [3]:
df=pd.read_csv('./covid19_dataset.csv')
df=df.fillna(df.mean())
y=df["INTUBATION"]
z=df["INTENSIVE CARE"]
ID=df["ID"]
X=df.drop(["INTUBATION","INTENSIVE CARE","ID"],axis=1)
df["INTUBATION"].value_counts()


NO     1350
YES      89
Name: INTUBATION, dtype: int64

In [4]:
df["INTENSIVE CARE"].value_counts()
df.head

<bound method NDFrame.head of         ID      HEIGHT     WEIGHT INTENSIVE CARE INTUBATION  WBC 1  NE# 1  \
0        1  164.580078  74.953488             NO         NO   9.08   5.27   
1        2  174.000000  87.000000             NO         NO  11.31   7.86   
2        4  182.000000  93.000000             NO         NO  12.42   7.05   
3        6  164.580078  74.953488             NO         NO   8.19   5.25   
4       12  164.580078  74.953488             NO         NO   9.56   7.25   
...    ...         ...        ...            ...        ...    ...    ...   
1434  3650  164.580078  74.953488             NO         NO   7.23   3.27   
1435  3651  164.580078  74.953488             NO         NO  10.31   6.68   
1436  3652  164.580078  74.953488             NO         NO   7.77   6.65   
1437  3653  164.580078  74.953488             NO         NO   3.71   2.78   
1438  3654  164.580078  74.953488             NO         NO  11.52   9.34   

      LY# 1  MO# 1  EO# 1  ...  SEDIM 1  PT 1

## INTUBATION

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.25,random_state=42)

In [6]:
rf_model = RandomForestClassifier(random_state=42).fit(X_train, y_train)

In [7]:
rf_model

RandomForestClassifier(random_state=42)

## Model Tuning

In [8]:
rf_params={"max_depth":[5,8,10],
          "max_features":[2,5,10],
          "n_estimators":[200,500,1000,2000],
          "min_samples_split":[2,10,80,100]}

In [9]:
rf_cv_model=GridSearchCV(rf_model,rf_params, cv=10,n_jobs=-1,verbose=2).fit(X_train,y_train)

Fitting 10 folds for each of 144 candidates, totalling 1440 fits


In [10]:
rf_cv_model.best_params_

{'max_depth': 8,
 'max_features': 2,
 'min_samples_split': 2,
 'n_estimators': 2000}

In [11]:
rf_model = RandomForestClassifier(random_state=42,
                                max_depth=8,
                                max_features=2,
                                min_samples_split=2,
                                n_estimators=2000)
rf_tuned=rf_model.fit(X_train, y_train)

In [18]:
rf_tuned

RandomForestClassifier(max_depth=8, max_features=2, n_estimators=2000,
                       random_state=42)

In [19]:
y_pred =rf_tuned.predict(X_test)
final=pd.DataFrame({'INTUBATION':y_test, 'INTUBATION_PREDICT':y_pred})

In [20]:
final.head()

Unnamed: 0,INTUBATION,INTUBATION_PREDICT
168,NO,NO
605,NO,NO
548,NO,NO
65,NO,NO
628,NO,NO


In [22]:
confusion = metrics.confusion_matrix(final.INTUBATION, final.INTUBATION_PREDICT)
confusion

array([[339,   1],
       [ 17,   3]], dtype=int64)

In [23]:
TP = confusion[1,1] # true positive 
TN = confusion[0,0] # true negatives
FP = confusion[0,1] # false positives
FN = confusion[1,0] # false negatives

In [24]:
"Senvitiy:" 
TP / float(TP+FN)

0.15

In [25]:
"Specificity:"
TN / float(TN+FP)

0.9970588235294118

In [26]:
cl_report= classification_report(y_test,y_pred)

In [27]:
print(cl_report)

              precision    recall  f1-score   support

          NO       0.95      1.00      0.97       340
         YES       0.75      0.15      0.25        20

    accuracy                           0.95       360
   macro avg       0.85      0.57      0.61       360
weighted avg       0.94      0.95      0.93       360



## INTENSIVE CARE

In [28]:
X_train, X_test, z_train, z_test = train_test_split(X,z,test_size=0.25,random_state=42)

In [29]:
rf_modelIC = RandomForestClassifier(random_state=42).fit(X_train, z_train)
rf_modelIC

RandomForestClassifier(random_state=42)

In [30]:
rfIC_params={"max_depth":[5,8,10],
          "max_features":[2,5,10],
          "n_estimators":[200,500,1000,2000],
          "min_samples_split":[2,10,80,100]}

In [31]:
rf_cv_modelIC=GridSearchCV(rf_modelIC,rfIC_params, cv=10,n_jobs=-1,verbose=2).fit(X_train,z_train)

Fitting 10 folds for each of 144 candidates, totalling 1440 fits


In [32]:
rf_cv_modelIC.best_params_

{'max_depth': 8,
 'max_features': 10,
 'min_samples_split': 10,
 'n_estimators': 1000}

In [33]:
rf_modelIC = RandomForestClassifier(random_state=42,
                                max_depth=8,
                                max_features=10,
                                min_samples_split=10,
                                n_estimators=2000)
rfIC_tuned=rf_cv_modelIC.fit(X_train, z_train)

Fitting 10 folds for each of 144 candidates, totalling 1440 fits


In [34]:
rfIC_tuned

GridSearchCV(cv=10, estimator=RandomForestClassifier(random_state=42),
             n_jobs=-1,
             param_grid={'max_depth': [5, 8, 10], 'max_features': [2, 5, 10],
                         'min_samples_split': [2, 10, 80, 100],
                         'n_estimators': [200, 500, 1000, 2000]},
             verbose=2)

In [35]:
z_pred =rfIC_tuned.predict(X_test)
finalIC=pd.DataFrame({'INTENSIVECARE':z_test, 'INTENSIVECARE_PREDICT':z_pred})

In [36]:
finalIC

Unnamed: 0,INTENSIVECARE,INTENSIVECARE_PREDICT
168,NO,NO
605,NO,NO
548,NO,NO
65,NO,NO
628,NO,NO
...,...,...
100,NO,NO
382,NO,NO
618,NO,NO
1078,NO,NO


In [37]:
confusion = metrics.confusion_matrix(finalIC.INTENSIVECARE, finalIC.INTENSIVECARE_PREDICT)
confusion

array([[313,   6],
       [ 22,  19]], dtype=int64)

In [38]:
TP = confusion[1,1] # true positive 
TN = confusion[0,0] # true negatives
FP = confusion[0,1] # false positives
FN = confusion[1,0] # false negatives

In [39]:
"Senvitiy:" 
TP / float(TP+FN)

0.4634146341463415

In [40]:
"Specificity:"
TN / float(TN+FP)

0.9811912225705329