In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tqdm import tqdm
# from tqdm import tnrange, tqdm_notebook

In [3]:
df = pd.read_csv("data/Surgical-deepnet.csv")
df

Unnamed: 0,bmi,Age,asa_status,baseline_cancer,baseline_charlson,baseline_cvd,baseline_dementia,baseline_diabetes,baseline_digestive,baseline_osteoart,...,complication_rsi,dow,gender,hour,month,moonphase,mort30,mortality_rsi,race,complication
0,19.31,59.2,1,1,0,0,0,0,0,0,...,-0.57,3,0,7.63,6,1,0,-0.43,1,0
1,18.73,59.1,0,0,0,0,0,0,0,0,...,0.21,0,0,12.93,0,1,0,-0.41,1,0
2,21.85,59.0,0,0,0,0,0,0,0,0,...,0.00,2,0,7.68,5,3,0,0.08,1,0
3,18.49,59.0,1,0,1,0,0,1,1,0,...,-0.65,2,1,7.58,4,3,0,-0.32,1,0
4,19.70,59.0,1,0,0,0,0,0,0,0,...,0.00,0,0,7.88,11,0,0,0.00,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14630,18.79,14.1,1,0,1,0,0,0,0,0,...,-0.54,1,0,7.78,0,1,0,-0.16,1,1
14631,19.65,12.6,0,0,0,0,0,0,0,0,...,-1.42,4,0,8.40,6,1,0,-0.77,1,1
14632,14.84,12.6,1,0,0,0,0,0,0,0,...,0.65,0,0,13.25,3,3,0,0.99,1,1
14633,17.75,8.9,0,0,1,0,0,0,1,0,...,-0.50,0,1,8.30,5,0,0,0.17,1,1


In [4]:
df.isnull().sum()

bmi                    0
Age                    0
asa_status             0
baseline_cancer        0
baseline_charlson      0
baseline_cvd           0
baseline_dementia      0
baseline_diabetes      0
baseline_digestive     0
baseline_osteoart      0
baseline_psych         0
baseline_pulmonary     0
ahrq_ccs               0
ccsComplicationRate    0
ccsMort30Rate          0
complication_rsi       0
dow                    0
gender                 0
hour                   0
month                  0
moonphase              0
mort30                 0
mortality_rsi          0
race                   0
complication           0
dtype: int64

In [5]:
X = df.drop('complication', axis=1).copy().values
Y = df['complication'].copy().values

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((11708, 24), (2927, 24), (11708,), (2927,))

In [6]:
Y_train = Y_train.reshape(-1, 1)
Y_test = Y_test.reshape(-1, 1)
Y_train.shape, Y_test.shape

((11708, 1), (2927, 1))

In [7]:

class Perceptron:
    def __init__(self, learning_rate, input_length):
        self.learning_rate = learning_rate
        self.w = np.random.rand(input_length)
        self.b = np.random.rand(1)

    def activation(self, x, function="sigmoid"):
        if function == 'sigmoid':
            return 1 / (1 + np.exp(-x))
        elif function == 'relu':
            return np.maximum(0, x)
        elif function == 'tanh':
            return np.tanh(x)
        elif function == 'linear':
            return x
        else:
            raise Exception("Unknown activation function")

    def forward(self, x):
      return self.activation(x @ self.w + self.b)

    def back_propagation(self, x, y, y_pred):
      error = y_pred - y
      dw = error * x
      db = error
      return dw, db

    def update(self, dw, db):
      self.w = self.w - self.learning_rate * dw
      self.b = self.b - self.learning_rate * db

    def fit(self, X_train, Y_train, epochs):

        for epoch in tqdm(range(epochs)):
            for x, y in zip(X_train, Y_train):
                # forwarding
                y_pred = self.forward(x)

                # back propagation
                dw, db = self.back_propagation(x, y, y_pred)

                # updating
                self.update(dw, db)


    def calculate_loss(self, X_test, Y_test, metric='mse'):
        Y_pred = self.predict(X_test)
        if metric == 'mse':
            return np.mean(np.square(Y_test - Y_pred))
        elif metric == 'mae':
            return np.mean(np.abs(Y_test - Y_pred))
        else:
            raise Exception('Unknown metric')


    def calculate_accuracy(self, X_test, Y_test):
        Y_pred = self.predict(X_test)
        Y_pred = np.where(Y_pred > 0.5, 1, 0)
        accuracy = np.mean(Y_pred == Y_test)
        return accuracy

    def predict(self, X_test):
#        Y_pred = []
#        for x_test in X_test:
        Y_pred = self.forward(X_test)
        Y_pred = Y_pred.reshape(-1, 1)
 #         Y_pred.append(y_pred)
        return np.array(Y_pred)

    def evaluate(self, X_test, Y_test):
        loss = self.calculate_loss(X_test, Y_test)
        accuracy = self.calculate_accuracy(X_test, Y_test)
        return loss, accuracy

In [8]:
model = Perceptron(learning_rate=0.001, input_length=X.shape[1])
model.fit(X_train, Y_train, epochs=256)

100%|██████████| 256/256 [01:05<00:00,  3.92it/s]


In [9]:
model.evaluate(X_test, Y_test)

(0.16149608724380782, 0.7745131533993851)

In [10]:
Y_pred = model.predict(X_test)
Y_pred = np.where(Y_pred > 0.5, 1, 0)

In [11]:
#confusion matrix

from sklearn.metrics import confusion_matrix
print(Y_test.shape)
print(Y_pred.shape)
confusion_matrix(Y_test, Y_pred)

(2927, 1)
(2927, 1)


array([[1973,  224],
       [ 436,  294]], dtype=int64)

In [12]:
# precision and recall

from sklearn.metrics import precision_score, recall_score

print('precision_score:', precision_score(Y_test, Y_pred))
print('recall_score:', recall_score(Y_test, Y_pred))

precision_score: 0.5675675675675675
recall_score: 0.40273972602739727
